plus sn10x, SI

Baf53cre ENS neurons, SI data
CR infection 7d, CTL x4 CKO(Ahr-cko) x4

loading 60k cells,
cellranger called 35,059 cells

load dependancies

load 10x data

filt.10x <- Read10X(data.dir = "../output_plus/filtered_feature_bc_matrix/")
## 10X data contains more than one type and is being returned as a list containing matrices of each type.
GEX <- filt.10x$`Gene Expression`
FB <- filt.10x$`Antibody Capture`

check datasets

dim(GEX)
## [1] 32285 35059
GEX[1:6,1:6]
## 6 x 6 sparse Matrix of class "dgCMatrix"
##         AAACCCAAGAAGTCTA-1 AAACCCAAGACATACA-1 AAACCCAAGACTCATC-1
## Xkr4                     .                  1                  .
## Gm1992                   .                  1                  2
## Gm19938                  1                  .                  .
## Gm37381                  .                  .                  .
## Rp1                      .                  .                  .
## Sox17                    .                  .                  .
##         AAACCCAAGAGGATGA-1 AAACCCAAGAGGTTTA-1 AAACCCAAGATACCAA-1
## Xkr4                     4                  2                  .
## Gm1992                   1                  1                  .
## Gm19938                  1                  .                  .
## Gm37381                  .                  .                  .
## Rp1                      .                  .                  .
## Sox17                    .                  .                  .
dim(FB)
## [1]     8 35059
FB[,1:6]
## 8 x 6 sparse Matrix of class "dgCMatrix"
##       AAACCCAAGAAGTCTA-1 AAACCCAAGACATACA-1 AAACCCAAGACTCATC-1
## CTL.1                 56                 93                 82
## CTL.2                 12                 24                 15
## CTL.3                 60                160                 21
## CTL.4                 32                 39                 93
## CKO.1                 36                 92                 51
## CKO.2                  .                  1                  1
## CKO.3                 68                 80                 71
## CKO.4                127                195                191
##       AAACCCAAGAGGATGA-1 AAACCCAAGAGGTTTA-1 AAACCCAAGATACCAA-1
## CTL.1                119                969                346
## CTL.2                144                 31                 18
## CTL.3                 21                 26                 18
## CTL.4                 40                 32                 38
## CKO.1                 79                 83                 57
## CKO.2                  2                  1                  2
## CKO.3                243                124                220
## CKO.4                238                284                198
rowSums(FB)
##    CTL.1    CTL.2    CTL.3    CTL.4    CKO.1    CKO.2    CKO.3    CKO.4 
##  6725956  1213800  1019608  1958441  3855728    44350  4876856 10727664
rowSums(FB>0)
## CTL.1 CTL.2 CTL.3 CTL.4 CKO.1 CKO.2 CKO.3 CKO.4 
## 35054 34991 35023 35047 35046 17297 35055 35056

FB

# build seurat object
FB.seur <- CreateSeuratObject(counts = FB,project = "CR7d_SI")

FB.seur
## An object of class Seurat 
## 8 features across 35059 samples within 1 assay 
## Active assay: RNA (8 features, 0 variable features)
rownames(FB.seur)
## [1] "CTL.1" "CTL.2" "CTL.3" "CTL.4" "CKO.1" "CKO.2" "CKO.3" "CKO.4"

normalization

perform Seurat ’Demultiplexing with hashtag oligos(HTOs)
ref https://satijalab.org/seurat/articles/hashing_vignette.html

# normalize
FB.seur <- NormalizeData(FB.seur)
FB.seur <- FindVariableFeatures(FB.seur, selection.method = "mean.var.plot")
FB.seur <- ScaleData(FB.seur, features = VariableFeatures(FB.seur))
## Centering and scaling data matrix
# independent assay for HTO data  
FB.seur[['HTO']] <- CreateAssayObject(counts = FB)
FB.seur <- NormalizeData(FB.seur, assay = 'HTO', normalization.method = 'CLR')
## Normalizing across features

check demux

first define FB colors based on conditions

color.FB <- ggsci::pal_d3("category20c")(20)[c(1,6,11,16,
                                               2,7,12,17
                                              
                                               )]


level.FB <- c(rownames(FB.seur))

color.FBraw <-  c("#FF6C91","lightgrey",color.FB)
scales::show_col(color.FBraw, ncol = 6)

scales::show_col(color.FB, ncol = 4)

par(mfrow=c(2,4))

#plot(sort(rowSums(t(FB))),
#     xlab="reordered cells", 
#     ylab="UMI counts", log="xy",
#     main="sum")


plot(sort(t(FB)[,1]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[1])
plot(sort(t(FB)[,2]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[2])
plot(sort(t(FB)[,3]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[3])
plot(sort(t(FB)[,4]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[4])

plot(sort(t(FB)[,5]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[5])
plot(sort(t(FB)[,6]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[6])
plot(sort(t(FB)[,7]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[7])
plot(sort(t(FB)[,8]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[8])

range ‘q’

q0.50 ~ 0.99
table.demux
##    quantile Doublet CTL.1 CTL.2 CTL.3 CTL.4 CKO.1 CKO.2 CKO.3 CKO.4 Negative
## 1      0.50   32609   329   293   279   346   328    45   262   274      294
## 2      0.51   32500   344   317   298   347   334    48   272   287      312
## 3      0.52   32317   372   329   334   368   342    51   298   318      330
## 4      0.53   32248   381   338   346   367   358    52   304   318      347
## 5      0.54   32038   411   365   342   403   392    54   333   351      370
## 6      0.55   31958   416   381   357   416   398    54   339   357      383
## 7      0.56   31812   433   379   378   429   430    55   354   380      409
## 8      0.57   31751   437   389   395   440   431    55   360   382      419
## 9      0.58   31611   447   413   423   456   447    55   371   393      443
## 10     0.59   31441   478   432   418   483   477    59   391   417      463
## 11     0.60   31142   530   457   457   521   515    61   431   452      493
## 12     0.61   31056   539   471   470   533   520    62   446   461      501
## 13     0.62   30980   542   481   486   532   535    63   455   472      513
## 14     0.63   30761   573   516   494   581   567    66   477   491      533
## 15     0.64   30492   609   535   530   617   609    68   507   518      574
## 16     0.65   30409   616   551   545   635   607    71   515   525      585
## 17     0.66   30257   635   571   567   642   626    76   521   542      622
## 18     0.67   30233   633   575   568   644   631    76   522   545      632
## 19     0.68   29815   693   620   610   700   699    82   574   590      676
## 20     0.69   29716   697   633   630   716   707    82   586   596      696
## 21     0.70   29579   710   655   658   728   725    84   595   603      722
## 22     0.71   29202   771   682   691   781   784    88   629   663      768
## 23     0.72   29019   793   717   718   793   808    88   643   683      797
## 24     0.73   28888   809   744   738   796   816    91   659   696      822
## 25     0.74   28701   839   749   772   826   837    93   665   718      859
## 26     0.75   28368   891   814   796   856   888    96   689   759      902
## 27     0.76   28241   899   832   819   859   897    97   703   766      946
## 28     0.77   28019   919   843   856   894   920   101   735   790      982
## 29     0.78   27744   960   881   873   922   956   105   767   816     1035
## 30     0.79   27484   984   900   921   955   989   109   788   842     1087
## 31     0.80   27355   996   915   942   960   997   113   800   851     1130
## 32     0.81   26982  1049   946   960  1015  1048   124   836   886     1213
## 33     0.82   26805  1074   974   986  1031  1061   127   833   898     1270
## 34     0.83   23185  1623  1483  1416  1582  1572    82  1204  1259     1653
## 35     0.84   22994  1638  1517  1440  1599  1593    87  1203  1268     1720
## 36     0.85   22369  1725  1605  1488  1684  1682    92  1253  1330     1831
## 37     0.86   22093  1750  1637  1533  1705  1705    96  1262  1354     1924
## 38     0.87   21604  1808  1681  1568  1771  1783   105  1293  1389     2057
## 39     0.88   21161  1871  1711  1647  1813  1832   112  1307  1405     2200
## 40     0.89   20713  1923  1777  1669  1863  1882   117  1328  1437     2350
## 41     0.90   20330  1966  1809  1729  1912  1919   121  1351  1432     2490
## 42     0.91   19686  2043  1881  1786  1976  1995   138  1393  1462     2699
## 43     0.92   19114  2107  1937  1825  2013  2073   155  1407  1489     2939
## 44     0.93   18485  2193  2005  1870  2062  2115   158  1414  1528     3229
## 45     0.94   17819  2281  2079  1944  2109  2171   167  1397  1556     3536
## 46     0.95   17107  2344  2126  2001  2178  2238   178  1405  1564     3918
## 47     0.96   16356  2406  2183  2056  2214  2290   195  1416  1535     4408
## 48     0.97   15384  2488  2267  2096  2272  2394   205  1388  1510     5055
## 49     0.98   14144  2583  2358  2110  2360  2494   224  1370  1486     5930
## 50     0.99   10058  3024  2764  2451  2695  2972   167  1371  1536     8021
#color.FB <- scales::hue_pal()(10)
par(mfrow=c(3,4))


plot(table.demux$Doublet, type="p", col="#FF6C91", pch=16, ylab="Doublet")
plot(table.demux$Negative, type="p", col="lightgrey", pch=16, ylab="Negative")
plot(rowSums(table.demux[,grep("CTL|CKO",colnames(table.demux))]), type="p", col="#306000", pch=16, ylab="Singlet")

#plot(table.demux$quantile, pch=16, ylab="mod-quantile")
plot.new()

plot(table.demux[,2+1], type="p", col=color.FB[1], pch=16, ylab=colnames(table.demux)[2+1])
plot(table.demux[,2+2], type="p", col=color.FB[2], pch=16, ylab=colnames(table.demux)[2+2])
plot(table.demux[,2+3], type="p", col=color.FB[3], pch=16, ylab=colnames(table.demux)[2+3])
plot(table.demux[,2+4], type="p", col=color.FB[4], pch=16, ylab=colnames(table.demux)[2+4])
plot(table.demux[,2+5], type="p", col=color.FB[5], pch=16, ylab=colnames(table.demux)[2+5])
plot(table.demux[,2+6], type="p", col=color.FB[6], pch=16, ylab=colnames(table.demux)[2+6])
plot(table.demux[,2+7], type="p", col=color.FB[7], pch=16, ylab=colnames(table.demux)[2+7])
plot(table.demux[,2+8], type="p", col=color.FB[8], pch=16, ylab=colnames(table.demux)[2+8])

q0.9900 ~ 0.9999
#table.demux.s
#color.FB <- scales::hue_pal()(10)
par(mfrow=c(3,4))


plot(table.demux.s$Doublet, type="p", col="#FF6C91", pch=16, ylab="Doublet")
plot(table.demux.s$Negative, type="p", col="lightgrey", pch=16, ylab="Negative")
plot(rowSums(table.demux.s[,grep("CTL|CKO",colnames(table.demux.s))]), type="p", col="#306000", pch=16, ylab="Singlet")

#plot(table.demux.s$quantile, pch=16, ylab="mod-quantile")
plot.new()

plot(table.demux.s[,2+1], type="p", col=color.FB[1], pch=16, ylab=colnames(table.demux.s)[2+1])
plot(table.demux.s[,2+2], type="p", col=color.FB[2], pch=16, ylab=colnames(table.demux.s)[2+2])
plot(table.demux.s[,2+3], type="p", col=color.FB[3], pch=16, ylab=colnames(table.demux.s)[2+3])
plot(table.demux.s[,2+4], type="p", col=color.FB[4], pch=16, ylab=colnames(table.demux.s)[2+4])
plot(table.demux.s[,2+5], type="p", col=color.FB[5], pch=16, ylab=colnames(table.demux.s)[2+5])
plot(table.demux.s[,2+6], type="p", col=color.FB[6], pch=16, ylab=colnames(table.demux.s)[2+6])
plot(table.demux.s[,2+7], type="p", col=color.FB[7], pch=16, ylab=colnames(table.demux.s)[2+7])
plot(table.demux.s[,2+8], type="p", col=color.FB[8], pch=16, ylab=colnames(table.demux.s)[2+8])

demultiplexing

# Demultiplex cells based on HTO enrichment
FB.seur <- HTODemux(FB.seur, assay = 'HTO', positive.quantile = 0.91)
## Cutoff for CTL.1 : 140 reads
## Cutoff for CTL.2 : 27 reads
## Cutoff for CTL.3 : 25 reads
## Cutoff for CTL.4 : 53 reads
## Cutoff for CKO.1 : 79 reads
## Cutoff for CKO.2 : 1 reads
## Cutoff for CKO.3 : 153 reads
## Cutoff for CKO.4 : 354 reads
table(FB.seur$HTO_classification.global)
## 
##  Doublet Negative  Singlet 
##    19686     2699    12674
table(FB.seur$hash.ID)[c("Doublet","Negative",rownames(FB))]
## 
##  Doublet Negative    CTL.1    CTL.2    CTL.3    CTL.4    CKO.1    CKO.2 
##    19686     2699     2043     1881     1786     1976     1995      138 
##    CKO.3    CKO.4 
##     1393     1462
# Demultiplex cells based on HTO enrichment
FB.seur <- HTODemux(FB.seur, assay = 'HTO', positive.quantile = 0.92)
## Cutoff for CTL.1 : 145 reads
## Cutoff for CTL.2 : 28 reads
## Cutoff for CTL.3 : 26 reads
## Cutoff for CTL.4 : 55 reads
## Cutoff for CKO.1 : 81 reads
## Cutoff for CKO.2 : 1 reads
## Cutoff for CKO.3 : 159 reads
## Cutoff for CKO.4 : 366 reads
table(FB.seur$HTO_classification.global)
## 
##  Doublet Negative  Singlet 
##    19114     2939    13006
table(FB.seur$hash.ID)[c("Doublet","Negative",rownames(FB))]
## 
##  Doublet Negative    CTL.1    CTL.2    CTL.3    CTL.4    CKO.1    CKO.2 
##    19114     2939     2107     1937     1825     2013     2073      155 
##    CKO.3    CKO.4 
##     1407     1489
# Demultiplex cells based on HTO enrichment
FB.seur <- HTODemux(FB.seur, assay = 'HTO', positive.quantile = 0.93)
## Cutoff for CTL.1 : 150 reads
## Cutoff for CTL.2 : 29 reads
## Cutoff for CTL.3 : 27 reads
## Cutoff for CTL.4 : 57 reads
## Cutoff for CKO.1 : 84 reads
## Cutoff for CKO.2 : 1 reads
## Cutoff for CKO.3 : 166 reads
## Cutoff for CKO.4 : 380 reads
table(FB.seur$HTO_classification.global)
## 
##  Doublet Negative  Singlet 
##    18485     3229    13345
table(FB.seur$hash.ID)[c("Doublet","Negative",rownames(FB))]
## 
##  Doublet Negative    CTL.1    CTL.2    CTL.3    CTL.4    CKO.1    CKO.2 
##    18485     3229     2193     2005     1870     2062     2115      158 
##    CKO.3    CKO.4 
##     1414     1528
# Demultiplex cells based on HTO enrichment
FB.seur <- HTODemux(FB.seur, assay = 'HTO', positive.quantile = 0.94)
## Cutoff for CTL.1 : 156 reads
## Cutoff for CTL.2 : 30 reads
## Cutoff for CTL.3 : 28 reads
## Cutoff for CTL.4 : 59 reads
## Cutoff for CKO.1 : 88 reads
## Cutoff for CKO.2 : 1 reads
## Cutoff for CKO.3 : 173 reads
## Cutoff for CKO.4 : 396 reads
table(FB.seur$HTO_classification.global)
## 
##  Doublet Negative  Singlet 
##    17819     3536    13704
table(FB.seur$hash.ID)[c("Doublet","Negative",rownames(FB))]
## 
##  Doublet Negative    CTL.1    CTL.2    CTL.3    CTL.4    CKO.1    CKO.2 
##    17819     3536     2281     2079     1944     2109     2171      167 
##    CKO.3    CKO.4 
##     1397     1556
# Demultiplex cells based on HTO enrichment
FB.seur <- HTODemux(FB.seur, assay = 'HTO', positive.quantile = 0.95)
## Cutoff for CTL.1 : 163 reads
## Cutoff for CTL.2 : 32 reads
## Cutoff for CTL.3 : 29 reads
## Cutoff for CTL.4 : 61 reads
## Cutoff for CKO.1 : 92 reads
## Cutoff for CKO.2 : 1 reads
## Cutoff for CKO.3 : 182 reads
## Cutoff for CKO.4 : 415 reads
table(FB.seur$HTO_classification.global)
## 
##  Doublet Negative  Singlet 
##    17107     3918    14034
table(FB.seur$hash.ID)[c("Doublet","Negative",rownames(FB))]
## 
##  Doublet Negative    CTL.1    CTL.2    CTL.3    CTL.4    CKO.1    CKO.2 
##    17107     3918     2344     2126     2001     2178     2238      178 
##    CKO.3    CKO.4 
##     1405     1564
# Demultiplex cells based on HTO enrichment
FB.seur <- HTODemux(FB.seur, assay = 'HTO', positive.quantile = 0.96)
## Cutoff for CTL.1 : 172 reads
## Cutoff for CTL.2 : 34 reads
## Cutoff for CTL.3 : 30 reads
## Cutoff for CTL.4 : 65 reads
## Cutoff for CKO.1 : 97 reads
## Cutoff for CKO.2 : 1 reads
## Cutoff for CKO.3 : 192 reads
## Cutoff for CKO.4 : 438 reads
table(FB.seur$HTO_classification.global)
## 
##  Doublet Negative  Singlet 
##    16356     4408    14295
table(FB.seur$hash.ID)[c("Doublet","Negative",rownames(FB))]
## 
##  Doublet Negative    CTL.1    CTL.2    CTL.3    CTL.4    CKO.1    CKO.2 
##    16356     4408     2406     2183     2056     2214     2290      195 
##    CKO.3    CKO.4 
##     1416     1535
# Demultiplex cells based on HTO enrichment
FB.seur <- HTODemux(FB.seur, assay = 'HTO', positive.quantile = 0.97)
## Cutoff for CTL.1 : 183 reads
## Cutoff for CTL.2 : 36 reads
## Cutoff for CTL.3 : 32 reads
## Cutoff for CTL.4 : 69 reads
## Cutoff for CKO.1 : 103 reads
## Cutoff for CKO.2 : 1 reads
## Cutoff for CKO.3 : 205 reads
## Cutoff for CKO.4 : 467 reads
table(FB.seur$HTO_classification.global)
## 
##  Doublet Negative  Singlet 
##    15384     5055    14620
table(FB.seur$hash.ID)[c("Doublet","Negative",rownames(FB))]
## 
##  Doublet Negative    CTL.1    CTL.2    CTL.3    CTL.4    CKO.1    CKO.2 
##    15384     5055     2488     2267     2096     2272     2394      205 
##    CKO.3    CKO.4 
##     1388     1510
# Demultiplex cells based on HTO enrichment
FB.seur <- HTODemux(FB.seur, assay = 'HTO', positive.quantile = 0.98)
## Cutoff for CTL.1 : 198 reads
## Cutoff for CTL.2 : 39 reads
## Cutoff for CTL.3 : 35 reads
## Cutoff for CTL.4 : 74 reads
## Cutoff for CKO.1 : 111 reads
## Cutoff for CKO.2 : 1 reads
## Cutoff for CKO.3 : 224 reads
## Cutoff for CKO.4 : 507 reads
table(FB.seur$HTO_classification.global)
## 
##  Doublet Negative  Singlet 
##    14144     5930    14985
table(FB.seur$hash.ID)[c("Doublet","Negative",rownames(FB))]
## 
##  Doublet Negative    CTL.1    CTL.2    CTL.3    CTL.4    CKO.1    CKO.2 
##    14144     5930     2583     2358     2110     2360     2494      224 
##    CKO.3    CKO.4 
##     1370     1486
# Demultiplex cells based on HTO enrichment
FB.seur <- HTODemux(FB.seur, assay = 'HTO', positive.quantile = 0.99)
## Cutoff for CTL.1 : 224 reads
## Cutoff for CTL.2 : 45 reads
## Cutoff for CTL.3 : 39 reads
## Cutoff for CTL.4 : 83 reads
## Cutoff for CKO.1 : 125 reads
## Cutoff for CKO.2 : 2 reads
## Cutoff for CKO.3 : 256 reads
## Cutoff for CKO.4 : 575 reads
table(FB.seur$HTO_classification.global)
## 
##  Doublet Negative  Singlet 
##    10058     8021    16980
table(FB.seur$hash.ID)[c("Doublet","Negative",rownames(FB))]
## 
##  Doublet Negative    CTL.1    CTL.2    CTL.3    CTL.4    CKO.1    CKO.2 
##    10058     8021     3024     2764     2451     2695     2972      167 
##    CKO.3    CKO.4 
##     1371     1536
# Demultiplex cells based on HTO enrichment
FB.seur <- HTODemux(FB.seur, assay = 'HTO', positive.quantile = 0.992)
## Cutoff for CTL.1 : 232 reads
## Cutoff for CTL.2 : 46 reads
## Cutoff for CTL.3 : 40 reads
## Cutoff for CTL.4 : 86 reads
## Cutoff for CKO.1 : 130 reads
## Cutoff for CKO.2 : 2 reads
## Cutoff for CKO.3 : 266 reads
## Cutoff for CKO.4 : 597 reads
table(FB.seur$HTO_classification.global)
## 
##  Doublet Negative  Singlet 
##     9498     8535    17026
table(FB.seur$hash.ID)[c("Doublet","Negative",rownames(FB))]
## 
##  Doublet Negative    CTL.1    CTL.2    CTL.3    CTL.4    CKO.1    CKO.2 
##     9498     8535     3020     2809     2474     2695     2987      173 
##    CKO.3    CKO.4 
##     1355     1513
# Demultiplex cells based on HTO enrichment
FB.seur <- HTODemux(FB.seur, assay = 'HTO', positive.quantile = 0.994)
## Cutoff for CTL.1 : 242 reads
## Cutoff for CTL.2 : 49 reads
## Cutoff for CTL.3 : 42 reads
## Cutoff for CTL.4 : 90 reads
## Cutoff for CKO.1 : 136 reads
## Cutoff for CKO.2 : 2 reads
## Cutoff for CKO.3 : 279 reads
## Cutoff for CKO.4 : 625 reads
table(FB.seur$HTO_classification.global)
## 
##  Doublet Negative  Singlet 
##     8760     9292    17007
table(FB.seur$hash.ID)[c("Doublet","Negative",rownames(FB))]
## 
##  Doublet Negative    CTL.1    CTL.2    CTL.3    CTL.4    CKO.1    CKO.2 
##     8760     9292     3061     2809     2452     2698     3013      185 
##    CKO.3    CKO.4 
##     1331     1458
# Demultiplex cells based on HTO enrichment
FB.seur <- HTODemux(FB.seur, assay = 'HTO', positive.quantile = 0.996)
## Cutoff for CTL.1 : 256 reads
## Cutoff for CTL.2 : 52 reads
## Cutoff for CTL.3 : 44 reads
## Cutoff for CTL.4 : 95 reads
## Cutoff for CKO.1 : 144 reads
## Cutoff for CKO.2 : 2 reads
## Cutoff for CKO.3 : 297 reads
## Cutoff for CKO.4 : 663 reads
table(FB.seur$HTO_classification.global)
## 
##  Doublet Negative  Singlet 
##     7948    10280    16831
table(FB.seur$hash.ID)[c("Doublet","Negative",rownames(FB))]
## 
##  Doublet Negative    CTL.1    CTL.2    CTL.3    CTL.4    CKO.1    CKO.2 
##     7948    10280     3047     2830     2458     2638     3010      189 
##    CKO.3    CKO.4 
##     1294     1365
# Demultiplex cells based on HTO enrichment
FB.seur <- HTODemux(FB.seur, assay = 'HTO', positive.quantile = 0.998)
## Cutoff for CTL.1 : 281 reads
## Cutoff for CTL.2 : 57 reads
## Cutoff for CTL.3 : 48 reads
## Cutoff for CTL.4 : 104 reads
## Cutoff for CKO.1 : 157 reads
## Cutoff for CKO.2 : 2 reads
## Cutoff for CKO.3 : 327 reads
## Cutoff for CKO.4 : 729 reads
table(FB.seur$HTO_classification.global)
## 
##  Doublet Negative  Singlet 
##     6757    11921    16381
table(FB.seur$hash.ID)[c("Doublet","Negative",rownames(FB))]
## 
##  Doublet Negative    CTL.1    CTL.2    CTL.3    CTL.4    CKO.1    CKO.2 
##     6757    11921     2957     2848     2382     2509     2999      199 
##    CKO.3    CKO.4 
##     1226     1261
# seems have to select a much suitable cutoff for CKO.2 based on its counts distribution
# others could check q0.95~q0.99 to choose a better one
# 
# CTL.1  q0.994  242
# CTL.2  q0.99   45
# CTL.3  q0.99   39
# CTL.4  q0.98   74   
# CKO.1  q0.994  136
# CKO.2  UMI 330
# CKO.3  q0.98   224
# CKO.4  q0.95   415
#


cutoff.FB <- c(248,45,39,72,
               156,330,224,430)
names(cutoff.FB) <- rownames(FB.seur)
cutoff.FB
## CTL.1 CTL.2 CTL.3 CTL.4 CKO.1 CKO.2 CKO.3 CKO.4 
##   248    45    39    72   156   330   224   430
par(mfrow=c(2,4))

#plot(sort(rowSums(t(FB))),
#     xlab="reordered cells", 
#     ylab="UMI counts", log="xy",
#     main="sum")


plot(sort(t(FB)[,1]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[1])
abline(h=cutoff.FB[1],col = color.FB[1])
plot(sort(t(FB)[,2]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[2])
abline(h=cutoff.FB[2],col = color.FB[2])
plot(sort(t(FB)[,3]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[3])
abline(h=cutoff.FB[3],col = color.FB[3])
plot(sort(t(FB)[,4]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[4])
abline(h=cutoff.FB[4],col = color.FB[4])

plot(sort(t(FB)[,5]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[5])
abline(h=cutoff.FB[5],col = color.FB[5])
plot(sort(t(FB)[,6]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[6])
abline(h=cutoff.FB[6],col = color.FB[6])
plot(sort(t(FB)[,7]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[7])
abline(h=cutoff.FB[7],col = color.FB[7])
plot(sort(t(FB)[,8]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[8])
abline(h=cutoff.FB[8],col = color.FB[8])

must process CKO.2 specifically, or the overall result could be messed up

par(mfrow=c(2,4))

#plot(sort(rowSums(t(FB))),
#     xlab="reordered cells", 
#     ylab="UMI counts", log="xy",
#     main="sum")


plot(sort(t(FB)[,1], decreasing = T),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[1])
abline(h=cutoff.FB[1],col = color.FB[1])
plot(sort(t(FB)[,2], decreasing = T),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[2])
abline(h=cutoff.FB[2],col = color.FB[2])
plot(sort(t(FB)[,3], decreasing = T),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[3])
abline(h=cutoff.FB[3],col = color.FB[3])
plot(sort(t(FB)[,4], decreasing = T),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[4])
abline(h=cutoff.FB[4],col = color.FB[4])

plot(sort(t(FB)[,5], decreasing = T),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[5])
abline(h=cutoff.FB[5],col = color.FB[5])
plot(sort(t(FB)[,6], decreasing = T),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[6])
abline(h=cutoff.FB[6],col = color.FB[6])
plot(sort(t(FB)[,7], decreasing = T),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[7])
abline(h=cutoff.FB[7],col = color.FB[7])
plot(sort(t(FB)[,8], decreasing = T),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[8])
abline(h=cutoff.FB[8],col = color.FB[8])

## custom cutoff run this
custom.Demux <- function(FBobj,FBcutoff){
  # define decision matrix
  dd <- FBobj@assays[['HTO']]@counts
  dd <- apply(dd, 2, function(x){
    x > FBcutoff
  })
  x1 <- colSums(dd)
  x1[x1>1] <- "Doublet"
  x1[x1==0] <- "Negative"
  x1[x1==1] <- "Singlet"

  x2 <- x1
  
  bc.slt  <- names(x1)[x1=="Singlet"]
  
  for(bc in bc.slt){
    x2[bc] <- rownames(dd)[dd[,bc]]
  }
  
  FBdf <- data.frame(HTO_classification.global=factor(x1,
                                                      levels = c("Doublet","Negative","Singlet")),
                     hash.ID=factor(x2,
                                    levels = c("Doublet","Negative",rownames(dd))))
  return(FBdf)
}

df.FB <- custom.Demux(FB.seur,cutoff.FB)
## custom cutoff run this
dim(df.FB)
## [1] 35059     2
df.FB[1:10,]
##                    HTO_classification.global hash.ID
## AAACCCAAGAAGTCTA-1                   Singlet   CTL.3
## AAACCCAAGACATACA-1                   Singlet   CTL.3
## AAACCCAAGACTCATC-1                   Singlet   CTL.4
## AAACCCAAGAGGATGA-1                   Doublet Doublet
## AAACCCAAGAGGTTTA-1                   Singlet   CTL.1
## AAACCCAAGATACCAA-1                   Singlet   CTL.1
## AAACCCAAGATGATTG-1                   Doublet Doublet
## AAACCCAAGGCTCTAT-1                   Singlet   CKO.4
## AAACCCAAGGTCTTTG-1                   Singlet   CKO.3
## AAACCCAAGGTTGGAC-1                   Singlet   CTL.4
## custom cutoff run this
table(df.FB)
##                          hash.ID
## HTO_classification.global Doublet Negative CTL.1 CTL.2 CTL.3 CTL.4 CKO.1 CKO.2
##                  Doublet     8617        0     0     0     0     0     0     0
##                  Negative       0     7359     0     0     0     0     0     0
##                  Singlet        0        0  3032  2930  2709  3294  2728     9
##                          hash.ID
## HTO_classification.global CKO.3 CKO.4
##                  Doublet      0     0
##                  Negative     0     0
##                  Singlet   1851  2530
## custom cutoff run this
FB.seur@meta.data[,c("HTO_classification.global","hash.ID")] <- df.FB
FB.seur@meta.data[1:4,]
##                    orig.ident nCount_RNA nFeature_RNA nCount_HTO nFeature_HTO
## AAACCCAAGAAGTCTA-1    CR7d_SI        391            7        391            7
## AAACCCAAGACATACA-1    CR7d_SI        684            8        684            8
## AAACCCAAGACTCATC-1    CR7d_SI        525            8        525            8
## AAACCCAAGAGGATGA-1    CR7d_SI        886            8        886            8
##                    HTO_maxID HTO_secondID HTO_margin HTO_classification
## AAACCCAAGAAGTCTA-1     CTL.3        CTL.4  0.7347680              CTL.3
## AAACCCAAGACATACA-1     CTL.3        CKO.1  1.3023967              CTL.3
## AAACCCAAGACTCATC-1     CTL.4        CTL.3  0.5069012           Negative
## AAACCCAAGAGGATGA-1     CTL.2        CKO.3  0.7489803              CTL.2
##                    HTO_classification.global hash.ID
## AAACCCAAGAAGTCTA-1                   Singlet   CTL.3
## AAACCCAAGACATACA-1                   Singlet   CTL.3
## AAACCCAAGACTCATC-1                   Singlet   CTL.4
## AAACCCAAGAGGATGA-1                   Doublet Doublet
## default q0.99 run this
#FB.seur$HTO_classification.global <- factor(as.character(FB.seur$HTO_classification.global),
#                                            levels = c("Doublet","Negative","Singlet"))
#FB.seur$hash.ID <- factor(as.character(FB.seur$hash.ID),
#                          levels = c("Doublet","Negative",rownames(FB.seur)))
sl_stat <- table(FB.seur$HTO_classification.global)
barplot(sl_stat,ylim = c(0,23500),col = c("#FF6C91","lightgrey","#7CAE00"),main = "Feature Barcode statistics")
text(x=1:3*1.2-0.5,y=sl_stat+1280,paste0(sl_stat,"\n",100*round(as.numeric(sl_stat/sum(sl_stat)),4),"%"),cex = 0.75)

RidgePlot

with ridge plots, raw
FB.seur@active.ident <- factor(FB.seur$HTO_maxID,levels=rownames(FB.seur))

RidgePlot(FB.seur, assay = 'HTO', features = rownames(FB.seur[['HTO']]),same.y.lims= TRUE, ncol=4,
          cols = color.FB) 

with ridge plots, filtered
FB.seur@meta.data$hash.ID.sort <- factor(as.character(FB.seur@meta.data$hash.ID),levels = c("Doublet","Negative",levels(FB.seur@active.ident)))
RidgePlot(FB.seur, assay = 'HTO', features = rownames(FB.seur[['HTO']]), ncol=4,same.y.lims= TRUE,
          cols = c("#FF6C91","lightgrey",color.FB),group.by = "hash.ID.sort") 

tSNE for HTOs

# first, remove negative cells from th object
#FB.seur.subset <- FB.seur

# Calculate a tSNE embedding of the HTO data
#DefaultAssay(FB.seur.subset) <- "HTO"

#FB.seur.subset <- ScaleData(FB.seur.subset, features = rownames(FB.seur.subset), verbose = FALSE)
##FB.seur.subset <- RunPCA(FB.seur.subset, features = rownames(FB.seur.subset), approx = FALSE)
##FB.seur.subset <- RunTSNE(FB.seur.subset, dims = 1:8, perplexity = 500, check_duplicates = FALSE)

#FB.seur.subset <- RunPCA(FB.seur.subset, features = setdiff(rownames(FB.seur.subset),"CKO.2"), approx = FALSE)
#FB.seur.subset <- RunTSNE(FB.seur.subset, dims = 1:7, perplexity = 300, check_duplicates = FALSE)

#saveRDS(FB.seur.subset, "FB1222.seur.subset.rds")
FB.seur.subset <- readRDS("FB1222.seur.subset.rds")
multiplot(
DimPlot(FB.seur.subset, order = rev(levels(FB.seur@active.ident)),
        cols = color.FB) + labs(title = "FB Max_ID"), 
DimPlot(FB.seur.subset, order = c("Doublet",rev(levels(FB.seur@active.ident)),"Negative"), group.by = 'hash.ID', label = F,
        cols = c("lightgrey",color.FB,"#FF6C91")) + 
  labs(title = "FB Filtered_ID") + theme(plot.title = element_text(hjust = 0)) ,
cols = 1)
## 
## Attaching package: 'grid'
## The following object is masked from 'package:Biostrings':
## 
##     pattern

stat

FB.seur$HTO_classification.global <- factor(as.character(FB.seur$HTO_classification.global),
                                            levels = c("Doublet","Negative","Singlet"))
Idents(FB.seur) <- "HTO_classification.global"
VlnPlot(FB.seur, features = "nCount_RNA", pt.size = 0, log = TRUE, cols = c("#F8766D","lightgrey","#00BA38"))

VlnPlot(FB.seur, features = "nCount_RNA", pt.size = 0, log = TRUE, cols = c("#F8766D","lightgrey","#00BA38")) + geom_jitter(alpha=0.015)

table(FB.seur@meta.data$hash.ID.sort)
## 
##  Doublet Negative    CTL.1    CTL.2    CTL.3    CTL.4    CKO.1    CKO.2 
##     8617     7359     3032     2930     2709     3294     2728        9 
##    CKO.3    CKO.4 
##     1851     2530
par(mar=c(6,4,2,3))
sl_stat <- table(FB.seur$hash.ID.sort)
barplot(sl_stat,ylim = c(0,12800),
        col = c("#FF6C91","lightgrey",color.FB),
        main = "Feature Barcode statistics",cex.names = 0.75, xaxt = "n")
axis(1,1:10*1.2-0.48 ,levels(FB.seur@meta.data$hash.ID.sort), las=3, cex.axis=0.85)
text(x=1:10*1.2-0.45,y=sl_stat+875,paste0(sl_stat,"\n",100*round(as.numeric(sl_stat/sum(sl_stat)),4),"%"),cex = 0.75)

GEX

mainly follow https://satijalab.org/seurat/articles/pbmc3k_tutorial.html

GEX.seur <- CreateSeuratObject(counts = GEX,
                               min.cells = 3,
                               min.features = 200,
                               project = "CR7d_SI")
GEX.seur
## An object of class Seurat 
## 24081 features across 35059 samples within 1 assay 
## Active assay: RNA (24081 features, 0 variable features)

filtering

MT genes

grep("^mt-",rownames(GEX),value = T)
##  [1] "mt-Nd1"  "mt-Nd2"  "mt-Co1"  "mt-Co2"  "mt-Atp8" "mt-Atp6" "mt-Co3" 
##  [8] "mt-Nd3"  "mt-Nd4l" "mt-Nd4"  "mt-Nd5"  "mt-Nd6"  "mt-Cytb"
MT_gene <- grep("^mt-",rownames(GEX.seur),value = T)
GEX.seur[["percent.mt"]] <- PercentageFeatureSet(GEX.seur, features = MT_gene)

RB_gene <- grep("^Rps|^Rpl",rownames(GEX.seur),value = T)
GEX.seur[["percent.rb"]] <- PercentageFeatureSet(GEX.seur, features = RB_gene)

# Visualize QC metrics as a violin plot
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.rb"), ncol = 4, pt.size = 0.01)

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.rb"), ncol = 4, pt.size = 0)

plota <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.mt") 
plotb <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") 
plotc <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.rb")
plota + plotb + plotc

add FB info

GEX.seur$FB.info <- FB.seur@meta.data[rownames(GEX.seur@meta.data),"hash.ID"]
#head(GEX.seur@meta.data)
table(GEX.seur$FB.info)
## 
##  Doublet Negative    CTL.1    CTL.2    CTL.3    CTL.4    CKO.1    CKO.2 
##     8617     7359     3032     2930     2709     3294     2728        9 
##    CKO.3    CKO.4 
##     1851     2530
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"), 
        #ncol = 3, 
        ncol = 2,
        cols = c("#FF6C91","lightgrey",color.FB),
        pt.size = 0.1, split.by = "FB.info")
## The default behaviour of split.by has changed.
## Separate violin plots are now plotted side-by-side.
## To restore the old behaviour of a single split violin,
## set split.plot = TRUE.
##       
## This message will be shown once per session.

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"), 
        #ncol = 3, 
        ncol = 2,
        cols = c("#FF6C91","lightgrey",color.FB),
        pt.size = 0, split.by = "FB.info")

GEX.seur <- subset(GEX.seur, subset = FB.info!="Doublet" & FB.info!="Negative" & FB.info!="CKO.2")
GEX.seur
## An object of class Seurat 
## 24081 features across 19074 samples within 1 assay 
## Active assay: RNA (24081 features, 0 variable features)
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.rb"), ncol = 4, pt.size = 0.1)

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.rb"), ncol = 4, pt.size = 0)

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"), 
        #ncol = 3, 
        ncol = 2,
        cols = c("#FF6C91","lightgrey",color.FB),
        pt.size = 0.1, split.by = "FB.info")

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"), 
        #ncol = 3, 
        ncol = 2,
        cols = c("#FF6C91","lightgrey",color.FB),
        pt.size = 0, split.by = "FB.info")

plota <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.mt") 
plotb <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") 
plotc <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.rb") 
plota + plotb + plotc

filtering

GEX.seur <- subset(GEX.seur, subset = percent.mt < 5 & nFeature_RNA > 500 & nFeature_RNA < 4000 & nCount_RNA < 10000)
GEX.seur
## An object of class Seurat 
## 24081 features across 18878 samples within 1 assay 
## Active assay: RNA (24081 features, 0 variable features)

filtered obj

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.rb"), ncol = 4, pt.size = 0.1)

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.rb"), ncol = 4, pt.size = 0)

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"), 
        #ncol = 3, 
        ncol = 2,
        cols = color.FBraw,
        pt.size = 0.1, split.by = "FB.info")

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"), 
        #ncol = 3, 
        ncol = 2,
        cols = color.FBraw,
        pt.size = 0, split.by = "FB.info")

plota <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.mt") 
plotb <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") 
plotc <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.rb") 
plota + plotb + plotc

par(mar=c(6,4,2,3))
sl_stat <- table(FB.seur$hash.ID.sort)
barplot(sl_stat,ylim = c(0,10800),
        col = c("#FF6C91","lightgrey",color.FB),
        main = "Feature Barcode statistics",cex.names = 0.75, xaxt = "n")
axis(1,1:10*1.2-0.48 ,levels(FB.seur@meta.data$hash.ID.sort), las=3, cex.axis=0.85)
text(x=1:10*1.2-0.45,y=sl_stat+675,paste0(sl_stat,"\n",100*round(as.numeric(sl_stat/sum(sl_stat)),4),"%"),cex = 0.75)

par(mar=c(6,4,2,3))
sl_stat <- table(GEX.seur$FB.info)
barplot(sl_stat,ylim = c(0,6800),
        col = c("#FF6C91","lightgrey",color.FB),
        main = "Feature Barcode statistics",cex.names = 0.75, xaxt = "n")
axis(1,1:10*1.2-0.48 ,levels(FB.seur@meta.data$hash.ID.sort), las=3, cex.axis=0.85)
text(x=1:10*1.2-0.45,y=sl_stat+475,paste0(sl_stat,"\n",100*round(as.numeric(sl_stat/sum(sl_stat)),4),"%"),cex = 0.75)

check cellcycle

s.genes <- Hmisc::capitalize(tolower(cc.genes.updated.2019$s.genes))
g2m.genes <- Hmisc::capitalize(tolower(cc.genes.updated.2019$g2m.genes))

GEX.seur <- CellCycleScoring(GEX.seur,
                             s.features = s.genes,
                             g2m.features = g2m.genes)
## Warning: The following features are not present in the object: E2f8, not
## searching for symbol synonyms
## Warning: The following features are not present in the object: Cdc25c, not
## searching for symbol synonyms
VlnPlot(GEX.seur, features = c("S.Score", "G2M.Score"), group.by = "FB.info", 
    ncol = 2, pt.size = 0.1)

GEX.seur.cc <- GEX.seur

GEX.seur.cc <- NormalizeData(GEX.seur.cc)
GEX.seur.cc <- FindVariableFeatures(GEX.seur.cc)
GEX.seur.cc <- ScaleData(GEX.seur.cc)
Idents(GEX.seur.cc) <- "Phase"
RidgePlot(GEX.seur.cc, features = c("Pcna", "Top2a", "Mcm6", "Mki67"), ncol = 2)

GEX.seur.cc <- RunPCA(GEX.seur.cc, features = c(s.genes, g2m.genes))
DimPlot(GEX.seur.cc, reduction = 'pca')

no cycling !

Markers and Clusters

Normalizing

GEX.seur <- NormalizeData(GEX.seur, normalization.method = "LogNormalize", scale.factor = 10000)
GEX.seur <- FindVariableFeatures(GEX.seur, selection.method = "vst", nfeatures = 1500)

# Identify the 10 most highly variable genes
#top10 <- head(VariableFeatures(GEX.seur), 10)
top20 <- head(VariableFeatures(GEX.seur), 20)

# plot variable features with and without labels
plot1 <- VariableFeaturePlot(GEX.seur)
plot2 <- LabelPoints(plot = plot1, points = top20, repel = T)
plot1 + plot2

head(VariableFeatures(GEX.seur), 200)
##   [1] "Wdr17"         "Gal"           "Mgat4c"        "Zfp804a"      
##   [5] "Cntn5"         "Adgrg6"        "Gm30613"       "Klhl1"        
##   [9] "Kcnb2"         "Sst"           "Robo2"         "Gm32647"      
##  [13] "Cdh8"          "Cpne4"         "Gm29521"       "Brinp3"       
##  [17] "Ntng1"         "Cntnap2"       "Gm21847"       "Nrxn3"        
##  [21] "Ano2"          "Cdh19"         "Pdzrn4"        "Cdh9"         
##  [25] "Gpr149"        "Ntrk3"         "Lingo2"        "Nmu"          
##  [29] "Ebf1"          "Nrg1"          "Sgcz"          "Kcnip4"       
##  [33] "Gm15261"       "Tmeff2"        "Zfp804b"       "Dgkg"         
##  [37] "Astn2"         "Pcdh9"         "Cmah"          "Cdh18"        
##  [41] "Grm5"          "Clstn2"        "Myh11"         "Gm29771"      
##  [45] "Car10"         "March1"        "Cemip"         "Prkg1"        
##  [49] "Rbpms"         "Nxph2"         "Cdh6"          "Sema5a"       
##  [53] "Schip1"        "Vip"           "Gpc5"          "Piezo2"       
##  [57] "Chsy3"         "Ano1"          "Gpm6a"         "Vwc2l"        
##  [61] "Grin3a"        "Rasgef1b"      "Fgf14"         "Dcc"          
##  [65] "Penk"          "Meis2"         "Asic2"         "Pbx3"         
##  [69] "4930509J09Rik" "Efr3a"         "Pcdh10"        "Plxna4"       
##  [73] "Col4a6"        "Kcnq5"         "Fmo2"          "4930432L08Rik"
##  [77] "Unc5d"         "Egfem1"        "Itgb6"         "Ccbe1"        
##  [81] "Cysltr2"       "Dkk2"          "Zfhx3"         "Gm15680"      
##  [85] "Cpa6"          "Gm20754"       "Spock3"        "9530059O14Rik"
##  [89] "Myl1"          "Abca9"         "8030451A03Rik" "4931415C17Rik"
##  [93] "Pde1a"         "Gm26632"       "Col25a1"       "Csmd3"        
##  [97] "Acp7"          "Kcnh7"         "Tnr"           "Robo1"        
## [101] "Muc16"         "Trhde"         "Serpini1"      "Skap1"        
## [105] "AC150683.1"    "Dpp10"         "Dgkb"          "Gm15584"      
## [109] "Cacna2d3"      "Bmpr1b"        "4930422I22Rik" "A330008L17Rik"
## [113] "Luzp2"         "Fut9"          "Prkcq"         "Sulf1"        
## [117] "Nxph1"         "Plcl1"         "Gm49938"       "Gm20713"      
## [121] "Cfh"           "Sox2ot"        "Foxp2"         "Gna14"        
## [125] "Gm38505"       "Tac1"          "Trpm5"         "Gm26612"      
## [129] "Gm11342"       "5530401A14Rik" "Csmd1"         "Cntn4"        
## [133] "9530026P05Rik" "Tafa2"         "Chrm3"         "Trps1"        
## [137] "1700034P13Rik" "Pde4d"         "Pcdh11x"       "Bmp5"         
## [141] "Kctd16"        "Arhgap6"       "Cux2"          "Il1rapl1"     
## [145] "Galnt18"       "Kif26b"        "Rab27b"        "Gm49953"      
## [149] "Gm16685"       "Gm13561"       "Kctd8"         "D030068K23Rik"
## [153] "Kcnd2"         "Hpse2"         "mt-Co3"        "Myh3"         
## [157] "Adgrd1"        "Ghr"           "Prune2"        "Arhgap15"     
## [161] "B230312C02Rik" "Calcb"         "Gm49127"       "1700063D05Rik"
## [165] "1700111E14Rik" "mt-Atp6"       "Slc4a4"        "Ano5"         
## [169] "Mme"           "Lama2"         "Hs6st3"        "Grik1"        
## [173] "Dgki"          "Chrna9"        "Gm20560"       "Arhgap18"     
## [177] "Gm11339"       "Rasgrf1"       "Rerg"          "Mecom"        
## [181] "B230323A14Rik" "Galr1"         "Nell1"         "Iqgap2"       
## [185] "Gm28494"       "Galntl6"       "Gm30382"       "Stk31"        
## [189] "Ror1"          "Nos1"          "C7"            "4930587E11Rik"
## [193] "Aebp1"         "Ttc29"         "Gm16226"       "Flrt2"        
## [197] "Gm12239"       "mt-Co1"        "Olfr1369-ps1"  "Fgl2"
GEX.seur <- ScaleData(GEX.seur, features = rownames(GEX.seur))
## Centering and scaling data matrix

PCA

# exclude MT genes  and more 

#DIG <- grep("^Tra|^Trb|^Trg|^Trd|^Tcr|^Igm|^Igh|^Igk|^Igl|Jchain|^Hsp|^Rps|^Rpl|Hbb-|Hba-|^Dnaj|^AY|^Gm|^Hist",rownames(GEX.seur),value = T)

DIG <- grep("^Tra|^Trb|^Trg|^Trd|^Tcr|^Igm|^Igh|^Igk|^Igl|Jchain|Mzb1|Vpreb|Lars2|Jun|Fos|^Hsp|^Rps|^Rpl|Hbb-|Hba-|^Dnaj|^AY|^AC|^AA|^AW|^BC|^Gm|^Hist|Rik$|-ps",
            rownames(GEX.seur),value = T)
CC_gene <- Hmisc::capitalize(tolower(as.vector(unlist(cc.genes.updated.2019))))


VariableFeatures(GEX.seur) <- setdiff(VariableFeatures(object = GEX.seur),
                                                c(MT_gene,
                                                  DIG, 
                                                  CC_gene) )

GEX.seur <- RunPCA(GEX.seur, features = VariableFeatures(GEX.seur))
## PC_ 1 
## Positive:  Lrrtm4, Galntl6, Tox, Ndst4, Grik1, Bnc2, Pcdh7, Tshz2, Cdc14a, Specc1 
##     Kcnd2, Chrna7, Epha5, Galnt17, Plcxd3, Syt6, Lrp1b, Fbxw15, Zfp536, Brinp2 
##     St6galnac3, Fbxw24, Kcnab1, Synpo2, Ptprt, Dock2, Oprk1, Nos1, Rspo2, Slc35f4 
## Negative:  Ntrk3, Ano2, Tmeff2, Robo2, Cdh8, Cpne4, Nrxn3, Mgat4c, Cntn5, Clstn2 
##     Adgrg6, Zfp804a, Myl1, Spock3, Gpr149, Pdzrn4, Cdh6, Cysltr2, Pcdh10, Grin3a 
##     Sgcz, Dgkg, Astn2, Cacna2d3, Kif26b, Plxna4, Itgb6, Cux2, Iqgap2, Zfhx3 
## PC_ 2 
## Positive:  Rbfox1, Ptprt, Bnc2, Tafa1, Tshz2, Gpc6, Frmd4b, Grik1, St6galnac3, Fbxw15 
##     Tcf7l2, Brinp2, Cdc14a, Plcxd3, Tox, Fbxw24, Tmem132c, Dock2, Pcdh7, Caln1 
##     Pld5, Oprk1, Specc1, Unc5c, Adamtsl1, Slc35f4, Ccbe1, Nfia, Sdk2, Dsc3 
## Negative:  Nos1, Egfem1, Auts2, Kcnq5, Cadps2, Etv1, Epha5, Dach1, Gfra1, Asic2 
##     Kcnab1, Schip1, Alcam, Cmah, Dgkb, Kcnt2, Stxbp6, Rgs6, Hs6st3, Tmem108 
##     Adgrl3, Lrrc4c, Creb5, Cdh11, Fat3, Ebf1, Il1rapl1, Kcnj3, Tenm3, Nxn 
## PC_ 3 
## Positive:  Cdh18, Kcnip4, Csmd3, Kctd8, Klhl1, Gpc6, Cadm2, Htr4, Pbx3, Cntn3 
##     Meis1, Dlc1, Gabrg3, Skap1, Csmd1, Car10, Tenm2, Pde10a, C79798, Serpini1 
##     March1, Dmd, Vwc2l, Unc5c, Sema5a, Khdrbs2, Pde4d, Sphkap, Plcl1, Sv2c 
## Negative:  Sgcd, Adgrg6, Cysltr2, Nos1, Gpr149, Nmu, Slc4a4, Grin3a, Itgb6, Dapk2 
##     Ano2, Dgkg, Rgs6, Nfia, Ccbe1, Cbln2, Ngfr, Kcnab1, Lhfpl2, Cpne4 
##     Otof, Gfra1, Syt2, Zfp536, Zfp804a, Efr3a, Pex7, Calcb, Smad6, Htr3b 
## PC_ 4 
## Positive:  Klhl1, March1, Vwc2l, Sema5a, Pbx3, Cdh9, Zfhx3, Rasgef1b, C79798, Galnt18 
##     Kcnh7, Zbbx, Il1rapl1, Prune2, Olfr78, Lncbate1, Bcl2, Mgat4c, Scgn, Alk 
##     Tmeff2, Kif26b, Dpp10, Ntm, Pakap, Tenm4, Auts2, Serpini1, Vcan, Aff3 
## Negative:  Dock10, Lingo2, Kcnip4, Ndst4, Nxph1, Sorcs1, Csmd3, Tac1, Gda, Lrp1b 
##     Cntn5, Lrrc7, Lrrtm4, Thsd4, Cntn3, Epha5, Nyap2, Dmd, Sgcz, Pld5 
##     Robo1, Lsamp, Kctd8, Abca5, Mctp1, Ccdc60, Lama2, Unc5d, Prkg1, Ryr3 
## PC_ 5 
## Positive:  Kcnt2, Dgkb, Prkg1, Thsd7b, Lrrc4c, Alcam, Cdh20, Khdrbs2, Rgs6, Epha5 
##     Car10, Klhl1, Gucy1a2, Plcl1, Vwc2l, Ptprz1, Stard13, Rasgef1b, Man1a, Vcan 
##     Galntl6, Dlgap1, Mgat4c, Il1rapl1, Hmcn1, Dpp10, Slc44a5, P3h2, Kctd8, Slc2a13 
## Negative:  Trhde, Ebf1, Cntn4, Trps1, Nrg1, Cpa6, Chsy3, Zmat4, Csmd1, Ptprd 
##     Col18a1, Gal, Kcnd2, Nkain3, Ntng1, Rmst, Npas3, Egfem1, Myo1e, Shisa6 
##     Sctr, Sdk1, Plcxd3, Myo16, Ptprt, Luzp2, Fstl4, Moxd1, Snca, Nav2
length(setdiff(VariableFeatures(object = GEX.seur),
                                                c(MT_gene,DIG,CC_gene)))
## [1] 1084
setdiff(VariableFeatures(object = GEX.seur),
                                                c(MT_gene,DIG,CC_gene))[1:300]
##   [1] "Wdr17"    "Gal"      "Mgat4c"   "Zfp804a"  "Cntn5"    "Adgrg6"  
##   [7] "Klhl1"    "Kcnb2"    "Sst"      "Robo2"    "Cdh8"     "Cpne4"   
##  [13] "Brinp3"   "Ntng1"    "Cntnap2"  "Nrxn3"    "Ano2"     "Cdh19"   
##  [19] "Pdzrn4"   "Cdh9"     "Gpr149"   "Ntrk3"    "Lingo2"   "Nmu"     
##  [25] "Ebf1"     "Nrg1"     "Sgcz"     "Kcnip4"   "Tmeff2"   "Zfp804b" 
##  [31] "Dgkg"     "Astn2"    "Pcdh9"    "Cmah"     "Cdh18"    "Grm5"    
##  [37] "Clstn2"   "Myh11"    "Car10"    "March1"   "Cemip"    "Prkg1"   
##  [43] "Rbpms"    "Nxph2"    "Cdh6"     "Sema5a"   "Schip1"   "Vip"     
##  [49] "Gpc5"     "Piezo2"   "Chsy3"    "Ano1"     "Gpm6a"    "Vwc2l"   
##  [55] "Grin3a"   "Rasgef1b" "Fgf14"    "Dcc"      "Penk"     "Meis2"   
##  [61] "Asic2"    "Pbx3"     "Efr3a"    "Pcdh10"   "Plxna4"   "Col4a6"  
##  [67] "Kcnq5"    "Fmo2"     "Unc5d"    "Egfem1"   "Itgb6"    "Ccbe1"   
##  [73] "Cysltr2"  "Dkk2"     "Zfhx3"    "Cpa6"     "Spock3"   "Myl1"    
##  [79] "Abca9"    "Pde1a"    "Col25a1"  "Csmd3"    "Acp7"     "Kcnh7"   
##  [85] "Tnr"      "Robo1"    "Muc16"    "Trhde"    "Serpini1" "Skap1"   
##  [91] "Dpp10"    "Dgkb"     "Cacna2d3" "Bmpr1b"   "Luzp2"    "Fut9"    
##  [97] "Prkcq"    "Sulf1"    "Nxph1"    "Plcl1"    "Cfh"      "Sox2ot"  
## [103] "Foxp2"    "Gna14"    "Tac1"     "Trpm5"    "Csmd1"    "Cntn4"   
## [109] "Tafa2"    "Chrm3"    "Trps1"    "Pde4d"    "Pcdh11x"  "Bmp5"    
## [115] "Kctd16"   "Arhgap6"  "Cux2"     "Il1rapl1" "Galnt18"  "Kif26b"  
## [121] "Rab27b"   "Kctd8"    "Kcnd2"    "Hpse2"    "Myh3"     "Adgrd1"  
## [127] "Ghr"      "Prune2"   "Arhgap15" "Calcb"    "Slc4a4"   "Ano5"    
## [133] "Mme"      "Lama2"    "Hs6st3"   "Grik1"    "Dgki"     "Chrna9"  
## [139] "Arhgap18" "Rasgrf1"  "Rerg"     "Mecom"    "Galr1"    "Nell1"   
## [145] "Iqgap2"   "Galntl6"  "Stk31"    "Ror1"     "Nos1"     "C7"      
## [151] "Aebp1"    "Ttc29"    "Flrt2"    "Fgl2"     "Svep1"    "Pdgfra"  
## [157] "Col8a1"   "Synpr"    "Kcnmb2"   "C79798"   "Esrrb"    "Dcn"     
## [163] "Fbxl7"    "Wt1"      "Nox3"     "Cntn3"    "Adamts9"  "Met"     
## [169] "Cdh10"    "Upk1b"    "Cadm2"    "Cbln2"    "Thsd7b"   "Adam2"   
## [175] "Cav1"     "Chrdl1"   "Alcam"    "Il18r1"   "Pde4c"    "Thsd4"   
## [181] "C3"       "Sctr"     "Myocd"    "Ntm"      "Hs3st4"   "Rxfp1"   
## [187] "Grm7"     "Sorcs3"   "Dock10"   "Dapk2"    "Glyat"    "Msln"    
## [193] "Mast4"    "Slc44a4"  "Cadps2"   "Galnt13"  "Ngfr"     "Ephb1"   
## [199] "Olfr78"   "Opcml"    "Plxdc2"   "Slc6a16"  "Mid1"     "Tbx20"   
## [205] "Npas3"    "Zfp286os" "Gabrg3"   "Tenm2"    "Tenm4"    "Igfbp6"  
## [211] "Abca8a"   "Kit"      "Mrvi1"    "Rmst"     "Etl4"     "Ppp3ca"  
## [217] "Tnc"      "Ldlrad2"  "Kirrel3"  "Atg4a"    "Prr16"    "Mir99ahg"
## [223] "Gsta2"    "Rarb"     "Aff2"     "Tex15"    "Dab1"     "Casp8"   
## [229] "Zbbx"     "Dach1"    "Dpp4"     "Calcrl"   "Etv1"     "F13a1"   
## [235] "Tenm1"    "Hccs"     "Serpine2" "Adamts6"  "Otof"     "Rtl4"    
## [241] "Sorcs1"   "Slc39a8"  "Wt1os"    "Cpne8"    "Lmx1a"    "Hcn1"    
## [247] "Adamtsl3" "Pkhd1l1"  "Stxbp6"   "Nell1os"  "Itgbl1"   "Sntg2"   
## [253] "Aldh1a2"  "Fstl4"    "Grp"      "Moxd1"    "Bfsp2"    "Adgrl4"  
## [259] "Scnn1a"   "Ptk7"     "Tafa1"    "Aspa"     "Vcan"     "Loxl1"   
## [265] "Inhbc"    "Pde7b"    "Rarres2"  "Lhfpl2"   "Nkain3"   "Wee2"    
## [271] "Xlr4a"    "Btk"      "Necab1"   "Slc44a5"  "Wbp2nl"   "Scgn"    
## [277] "Ptprt"    "Lrp2"     "L3mbtl4"  "Lrrc4c"   "Nwd1"     "Scn7a"   
## [283] "Pantr1"   "Lmcd1"    "Nek1"     "Upk3b"    "Asb17"    "Bnc1"    
## [289] "Gda"      "Auts2"    "Gabrb1"   "Htr4"     "Fcrl5"    "Col18a1" 
## [295] "Ntrk2"    "Serpinb5" "Slco1a1"  "Prkca"    "Prkag2"   "Pakap"
DimPlot(GEX.seur, reduction = "pca",dims = 1:2, group.by = "orig.ident") +
  DimPlot(GEX.seur, reduction = "pca",dims = 3:4, group.by = "orig.ident")

DimPlot(GEX.seur, reduction = "pca",dims = 1:2, group.by = "FB.info", cols = color.FB) +
  DimPlot(GEX.seur, reduction = "pca",dims = 3:4, group.by = "FB.info", cols = color.FB)

DimHeatmap(GEX.seur, dims = 1:12, cells = 1500, balanced = TRUE,ncol = 4)

decide PCs to use
ElbowPlot(GEX.seur,ndims = 50)

PCs <- 1:25
GEX.seur <- FindNeighbors(GEX.seur, dims = PCs, k.param = 20)
## Computing nearest neighbor graph
## Computing SNN
GEX.seur <- FindClusters(GEX.seur, method = 'igraph' ,resolution = 2)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 18878
## Number of edges: 755688
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8372
## Number of communities: 33
## Elapsed time: 3 seconds

Run UMAP/tSNE

GEX.seur <- RunTSNE(GEX.seur, dims=PCs, complexity = 100)
GEX.seur <- RunUMAP(GEX.seur, dims=PCs, n.neighbors = 20, seed.use = 145)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 23:42:35 UMAP embedding parameters a = 0.9922 b = 1.112
## 23:42:35 Read 18878 rows and found 25 numeric columns
## 23:42:35 Using Annoy for neighbor search, n_neighbors = 20
## 23:42:35 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 23:42:38 Writing NN index file to temp file C:\Users\Shaorui\AppData\Local\Temp\RtmpAbnhPC\file824713335f0
## 23:42:38 Searching Annoy index using 1 thread, search_k = 2000
## 23:42:42 Annoy recall = 100%
## 23:42:43 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 20
## 23:42:44 Initializing from normalized Laplacian + noise (using irlba)
## 23:42:45 Commencing optimization for 200 epochs, with 568600 positive edges
## 23:43:04 Optimization finished
DimPlot(GEX.seur, reduction = "tsne", label = T) + DimPlot(GEX.seur, reduction = "umap", label = T)

FeaturePlot(GEX.seur, reduction = "umap", features = c("nFeature_RNA","nCount_RNA","percent.mt","percent.rb"))

DimPlot(GEX.seur, label = F, pt.size = 0.05, repel = F, reduction = 'umap', group.by = "FB.info", split.by = "FB.info", 
        ncol = 4, cols = color.FB)

#DimPlot(GEX.seur, label = F, pt.size = 0.05, repel = F, reduction = 'umap', group.by = "FB.info", split.by = "FB.info", 
#        ncol = 3, cols = color.FB)
GEX.seur$cnt <- factor(gsub(".1$|.2$|.3$|.4$","",as.character(GEX.seur$FB.info)),
                       levels = c("CTL",
                                  "CKO"))
color.cnt <- color.FB[c(1,5)]
DimPlot(GEX.seur, reduction = "umap", label = T, group.by = "seurat_clusters") +
  DimPlot(GEX.seur, reduction = "umap", label = F, group.by = "cnt", cols = color.cnt)

check short markers

markers.new.s <- list(EMN=c("Chat","Bnc2","Tox","Ptprt",
                       "Gfra2","Oprk1","Adamtsl1", 
                       "Fbxw15","Fbxw24","Chrna7",
                       "Satb1","Cntnap5b",
                       "Gabrb1","Nxph1","Lama2","Lrrc7",
                       "Ryr3","Eda","Tac1",
                       "Kctd8","Ntrk2","Penk",
                       "Fut9","Nfatc1","Egfr","Mgll",
                       "Chrm3"
                       ),
                 IMN=c("Nos1","Kcnab1",
                       "Gfra1","Etv1",
                       "Man1a","Airn",
                       "Adcy2","Cmah","Creb5","Vip","Pde1a",
                       "Ebf1","Gpc5"
                       ),
                 IN=c("Npas3","Synpr","St18","Gal",
                      "Kcnk13",
                      "Neurod6","Moxd1","Sctr",
                      "Piezo1","Sst","Adamts9","Kcnn2",
                      "Calb2"),
                 IPAN=c("Calcb","Nmu","Adgrg6","Pcdh10",
                        "Ngfr","Galr1","Il7","Aff2",
                        "Gpr149","Cdh6","Cdh8",
                        "Clstn2","Ano2","Ntrk3",
                        "Cpne4","Vwc2l","Cdh9","Scgn",
                        "Vcan","Cck","Piezo2","Kcnh7",
                        "Rerg","Bmpr1b","Skap1","Ntng1",
                        "Tafa2","Nxph2"),
                 CONTAM=c("Actl6b","Phox2b","Sox10","Plp1",
                          "Gfap","Rxrg","Pdgfra","Acta2"))


pm.CL_new.s <- DotPlot(subset(GEX.seur,subset=cnt %in% c("CTL")), features = as.vector(unlist(markers.new.s)), group.by = "seurat_clusters",
        cols = c("midnightblue","darkorange1")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 9.15))+ scale_y_discrete(limits=rev) + labs(title="CTL only")
pm.CL_new.s

pm.All_new.s <- DotPlot(GEX.seur, features = as.vector(unlist(markers.new.s)), group.by = "seurat_clusters",
        cols = c("midnightblue","darkorange1")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 9.15))+ scale_y_discrete(limits=rev) + labs(title="All")
pm.All_new.s

DoubletFinder

library(DoubletFinder)
for(i in 1:length(sweep.res.list)){
  if(length(sweep.res.list[[i]]$pANN[is.nan(sweep.res.list[[i]]$pANN)]) !=0){
    if(i!=1){
      sweep.res.list[[i]] <- sweep.res.list[[i-1]]
    }else(
      sweep.res.list[[i]] <- sweep.res.list[[i+1]]
    )
  }
}
sweep.stats <- summarizeSweep(sweep.res.list, GT=FALSE)
bcmvn <- find.pK(sweep.stats)

## NULL
pk_v <- as.numeric(as.character(bcmvn$pK))
pk_good <- pk_v[bcmvn$BCmetric==max(bcmvn$BCmetric)]
# specify expected doublet number     
nExp_poi <- round(0.05*length(colnames(GEX.seur)))

GEX.seur <- doubletFinder_v3(GEX.seur, PCs = PCs, pN = 0.25, pK = pk_good, 
                             nExp = nExp_poi, reuse.pANN = FALSE, sct = FALSE)
## [1] "Creating 6293 artificial doublets..."
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## [1] "Finding variable genes..."
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Computing pANN..."
## [1] "Classifying doublets.."
colnames(GEX.seur@meta.data)[ncol(GEX.seur@meta.data)] <- "DoubletFinder0.05"
# specify expected doublet number     
nExp_poi <- round(0.1*length(colnames(GEX.seur)))

GEX.seur <- doubletFinder_v3(GEX.seur, PCs = PCs, pN = 0.25, pK = pk_good, 
                             nExp = nExp_poi, reuse.pANN = FALSE, sct = FALSE)
## [1] "Creating 6293 artificial doublets..."
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## [1] "Finding variable genes..."
## [1] "Scaling data..."
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Computing pANN..."
## [1] "Classifying doublets.."
colnames(GEX.seur@meta.data)[ncol(GEX.seur@meta.data)] <- "DoubletFinder0.1"
DimPlot(GEX.seur, reduction = "umap", group.by = "DoubletFinder0.05") +
  DimPlot(GEX.seur, reduction = "umap", group.by = "DoubletFinder0.1")

sort_clusters

GEX.seur$sort_clusters <- factor(as.character(GEX.seur$seurat_clusters),
                                 levels = c(0,3,4,14,10,8,1,17,28,     # EMN
                                            5,2,6,7,13,16,26,     # IMN
                                            9,20, 22,     # IN
                                            15,11, 12,21, 29,19,32,     # IPAN
                                            27,25,18,31,24,     # Mix
                                            30,     # Glia
                                            23      # SMC
                                            ))

# preAnno1
GEX.seur$preAnno1 <- as.character(GEX.seur$seurat_clusters)

GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(0,4,3,14,10,8,1,17,28)] <- "EMN"
GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(5,2,6,7,13,16,26)] <- "IMN"
GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(9,20, 22)] <- "IN"
GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(15,11, 12,21, 29,32,19)] <- "IPAN"
GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(27,25,30,18,31,24)] <- "MIX"
GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(30)] <- "Glia"
GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(23)] <- "SMC"

GEX.seur$preAnno1 <- factor(GEX.seur$preAnno1,
                            levels = c("EMN","IMN","IN","IPAN",
                                       "MIX","Glia","SMC"))
# preAnno2
GEX.seur$preAnno2 <- as.character(GEX.seur$seurat_clusters)

GEX.seur$preAnno2[GEX.seur$preAnno2 %in% c(0,4,3,14)] <- "EMN1"
GEX.seur$preAnno2[GEX.seur$preAnno2 %in% c(10)] <- "EMN2"
GEX.seur$preAnno2[GEX.seur$preAnno2 %in% c(8,1)] <- "EMN3"
GEX.seur$preAnno2[GEX.seur$preAnno2 %in% c(17,28)] <- "EMN4"

GEX.seur$preAnno2[GEX.seur$preAnno2 %in% c(5,2,6,7)] <- "IMN1"
GEX.seur$preAnno2[GEX.seur$preAnno2 %in% c(13)] <- "IMN2"
GEX.seur$preAnno2[GEX.seur$preAnno2 %in% c(16)] <- "IMN3"
GEX.seur$preAnno2[GEX.seur$preAnno2 %in% c(26)] <- "IMN4"

GEX.seur$preAnno2[GEX.seur$preAnno2 %in% c(9)] <- "IN1"
GEX.seur$preAnno2[GEX.seur$preAnno2 %in% c(20)] <- "IN2"
GEX.seur$preAnno2[GEX.seur$preAnno2 %in% c(22)] <- "IN3"

GEX.seur$preAnno2[GEX.seur$preAnno2 %in% c(15,11)] <- "IPAN1"
GEX.seur$preAnno2[GEX.seur$preAnno2 %in% c(12,21)] <- "IPAN2"
GEX.seur$preAnno2[GEX.seur$preAnno2 %in% c(29,32)] <- "IPAN3"
GEX.seur$preAnno2[GEX.seur$preAnno2 %in% c(19)] <- "IPAN4"

GEX.seur$preAnno2[GEX.seur$preAnno2 %in% c(27,25,30,18,31,24)] <- "MIX"
GEX.seur$preAnno2[GEX.seur$preAnno2 %in% c(30)] <- "Glia"
GEX.seur$preAnno2[GEX.seur$preAnno2 %in% c(23)] <- "SMC"

GEX.seur$preAnno2 <- factor(GEX.seur$preAnno2,
                            levels = c(paste0("EMN",1:4),
                                       paste0("IMN",1:4),
                                       paste0("IN",1:3),
                                       paste0("IPAN",1:4),
                                       "MIX","Glia","SMC"))
pm.CL_new.s <- DotPlot(subset(GEX.seur,subset=cnt %in% c("CTL")), features = as.vector(unlist(markers.new.s)), group.by = "sort_clusters",
        cols = c("midnightblue","darkorange1")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 9.15))+ scale_y_discrete(limits=rev) + labs(title="CL only")
pm.CL_new.s

pm.All_new.s <- DotPlot(GEX.seur, features = as.vector(unlist(markers.new.s)), group.by = "sort_clusters",
        cols = c("midnightblue","darkorange1")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 9.15))+ scale_y_discrete(limits=rev) + labs(title="All")
pm.All_new.s

pm.CL_new.s <- DotPlot(subset(GEX.seur,subset=cnt %in% c("CTL")), features = as.vector(unlist(markers.new.s)), group.by = "preAnno2",
        cols = c("midnightblue","darkorange1")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 9.15))+ scale_y_discrete(limits=rev) + labs(title="CL only")
pm.CL_new.s

pm.All_new.s <- DotPlot(GEX.seur, features = as.vector(unlist(markers.new.s)), group.by = "preAnno2",
        cols = c("midnightblue","darkorange1")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 9.15))+ scale_y_discrete(limits=rev) + labs(title="All")
pm.All_new.s

DimPlot(GEX.seur, reduction = "umap", label = T, group.by = "preAnno2") +
  DimPlot(GEX.seur, reduction = "umap", label = F, group.by = "cnt", cols = color.cnt)

#saveRDS(GEX.seur,"GEX0112.seur.pre.rds")

pure

GEX.pure <- subset(GEX.seur, subset = preAnno1 %in% levels(GEX.seur$preAnno1)[1:4] & DoubletFinder0.1 == "Singlet")
GEX.pure
## An object of class Seurat 
## 24081 features across 16562 samples within 1 assay 
## Active assay: RNA (24081 features, 1084 variable features)
##  3 dimensional reductions calculated: pca, tsne, umap
pm.CL_new.s <- DotPlot(subset(GEX.pure,subset=cnt %in% c("CTL")), features = as.vector(unlist(markers.new.s)), group.by = "preAnno2",
        cols = c("midnightblue","darkorange1")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 9.15))+ scale_y_discrete(limits=rev) + labs(title="CL only")
pm.CL_new.s

pm.All_new.s <- DotPlot(GEX.pure, features = as.vector(unlist(markers.new.s)), group.by = "preAnno2",
        cols = c("midnightblue","darkorange1")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 9.15))+ scale_y_discrete(limits=rev) + labs(title="All")
pm.All_new.s

DimPlot(GEX.pure, reduction = "umap", label = T, group.by = "preAnno2") +
  DimPlot(GEX.pure, reduction = "umap", label = F, group.by = "cnt", cols = color.cnt)

cell composition

heatmap

cowplot::plot_grid(
pheatmap::pheatmap(table(FB.info=GEX.pure$FB.info,
      clusters=GEX.pure$preAnno2)[c(3:7,9,10),c(1:15)],
                   main = "Cell Count",
                   gaps_row = c(4),
      gaps_col = c(4,8,11,15),
                   cluster_rows = F, cluster_cols = F, display_numbers = T, number_format = "%.0f", legend = F, silent = T)$gtable,

pheatmap::pheatmap(100*rowRatio(table(FB.info=GEX.pure$FB.info,
                                clusters=GEX.pure$preAnno2)[c(3:7,9,10),c(1:15)]),
                   main = "Cell Ratio",
                   gaps_row = c(4),
      gaps_col = c(4,8,11,15),
                   cluster_rows = F, cluster_cols = F, display_numbers = T, number_format = "%.1f", legend = F, silent =T)$gtable,
ncol = 1)

barplot

GEX.pure$rep <- paste0("rep",
                        gsub("CTL|CKO","",as.character(GEX.pure$FB.info)))
stat_preAnno2 <- GEX.pure@meta.data[,c("cnt","rep","preAnno2")]

stat_preAnno2.s <- stat_preAnno2 %>%
  group_by(cnt,rep,preAnno2) %>%
  summarise(count=n()) %>%
  mutate(prop= count/sum(count)) %>%
  ungroup
## `summarise()` has grouped output by 'cnt', 'rep'. You can override using the
## `.groups` argument.
pcom.preAnno2 <- stat_preAnno2.s %>%
  ggplot(aes(x = preAnno2, y = 100*prop, color=cnt)) +
  geom_bar(stat="summary", fun="mean", position = position_dodge(0.75), width = 0.58, fill="white") +
  theme_classic(base_size = 15) +
  scale_color_manual(values = c("skyblue","pink"), name="") +
  labs(title="Cell Composition of SI data, preAnno2", y = "Proportion") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6)) +
  
  geom_point(aes(x=preAnno2, y = 100*prop, color= cnt),
             position = position_dodge(0.75),
             shape=16,alpha=0.75,size=2.15,
             stroke=0.15, show.legend=F)

pcom.preAnno2

sig.test

glm.nb - anova.LRT

Sort.N <- c("CTL","CKO")

glm.nb_anova.preAnno2 <- list()

for(x1 in 1:2){
  for(x2 in 1:2){
    if(x2>x1){
      
      stat_preAnno2.s_N <- stat_preAnno2.s %>% filter(cnt %in% c(Sort.N[x1],Sort.N[x2]))
      
      total.N <- stat_preAnno2.s_N %>%
        group_by(cnt, rep) %>%
        summarise(total=sum(count)) %>% ungroup() %>% as.data.frame()
      
      rownames(total.N) <- paste0(as.character(total.N$cnt),
                                  "_",
                                  as.character(total.N$rep))
      
      stat_preAnno2.s_N$total <- total.N[paste0(stat_preAnno2.s_N$cnt,
                                            "_",
                                            stat_preAnno2.s_N$rep),"total"]
      
      glm.nb_anova.preAnno2[[paste0(Sort.N[x1],"vs",Sort.N[x2])]] <- list()
      
      for(AA in levels(stat_preAnno2.s_N$preAnno2)){
        glm.nb_anova.preAnno2[[paste0(Sort.N[x1],"vs",Sort.N[x2])]][[AA]] <- tryCatch({
          aaa <- stat_preAnno2.s_N %>% filter(preAnno2==AA)
          aaa.glmnb <- MASS::glm.nb(count ~ cnt + offset(log(total)), data = aaa, maxit=1000)
          aaa.anova <- anova(aaa.glmnb, test = "LRT")
          aaa.anova$'Pr(>Chi)'[2]
        }, error=function(e){
        
          tryCatch({
            aaa <- stat_preAnno2.s_N %>% filter(preAnno2==AA)
            aaa.glmnb <- MASS::glm.nb(count ~ cnt + offset(log(total)), data = aaa, maxit=100)
            aaa.anova <- anova(aaa.glmnb, test = "LRT")
            aaa.anova$'Pr(>Chi)'[2]
          }, error=function(e){
            
            tryCatch({
              aaa <- stat_preAnno2.s_N %>% filter(preAnno2==AA)
              aaa.glmnb <- MASS::glm.nb(count ~ cnt + offset(log(total)), data = aaa, maxit=10)
              aaa.anova <- anova(aaa.glmnb, test = "LRT")
              aaa.anova$'Pr(>Chi)'[2]
            }, error = function(e){
              NA
            })     
          })
        })
      }
      glm.nb_anova.preAnno2[[paste0(Sort.N[x1],"vs",Sort.N[x2])]] <- unlist(glm.nb_anova.preAnno2[[paste0(Sort.N[x1],"vs",Sort.N[x2])]]) 
    }
  }
}
glm.nb_anova.preAnno2_df <- t(data.frame(Reduce(function(x,y){rbind(x,y)},glm.nb_anova.preAnno2)))
rownames(glm.nb_anova.preAnno2_df) <- names(glm.nb_anova.preAnno2)
colnames(glm.nb_anova.preAnno2_df) <- gsub("X","C",colnames(glm.nb_anova.preAnno2_df))
glm.nb_anova.preAnno2_df
##               EMN1      EMN2      EMN3      EMN4     IMN1     IMN2      IMN3
## CTLvsCKO 0.3533798 0.2524307 0.9089482 0.5325865 0.279355 0.100509 0.8366743
##              IMN4          IN1       IN2       IN3      IPAN1     IPAN2
## CTLvsCKO 0.753595 0.0003353706 0.4826819 0.4567131 0.07933713 0.9827769
##              IPAN3     IPAN4 MIC Glia SMC
## CTLvsCKO 0.5146459 0.3950726  NA   NA  NA